** Data cleaning for The Effects of Changes in Local Bank Health on Household Consumption
** by Daniel Cooper and Joe Peek, ReStat 2020
/* ////////////////////////////////////////////////////////////////////////// */
local version revision_hmr
local filename "main_dataset_`version'"

clear all
set more off
set maxvar 120000

/* This code merges together all non-equifax data and creates the main dataset(s) 
for the regressions.
 
It must be run twice: 
for the Full sample [with hw = 1 and hw_fw = 0]
and for the Wealth sample [with hw = 0 and hw_fw = 1]
(see line 38 below) 

In the cleaning block we drop various oddities in the data (such as weird house value jumps/drops) 
for completeness, however most of these adjustments do not affect many observations
Many of the sample restrictions are discussed in the paper*/

* the code is divided into blocks for ease of execution
********************************************************************************
scalar merging = 0 // merging all data together
scalar cleaning = 0 // further data cleaning, sample restrictions, timming/winsorizing
scalar variables = 0
	scalar make = 0 // makes constraint vars and interacts regression vars with constrained, liquid asset holdings
	scalar label = 0 // labeling regression variables for output
********************************************************************************

* update with relevant file paths
local data /shared/Joe_Peek/bankhealth_nocamels/data
local camsdir /shared/Joe_Peek/bankhealth/bankhealth/data
local psid ~/PSID_new

* Age for constraint cutoff
local age_cut 30
	local `age_cut' _30

*Choose which dataset to run - must run each individually 
scalar hw = 1       // Housing Wealth, aka "Full Sample"
scalar hw_fw = 0     // Housing Wealth and Financial Wealth, aka "Wealth Sample"

********************************************************************************
if merging {
** Merge bank health data with the broader PSID data set **
use `data'/bhdat_2015_v4, clear
sort unique year

ren (liqw_alt l_liqw_alt fvaca) (liqw_combo l_liqw_combo rvaca)

*merge in broader (BPP) imputed consumption measure, see Readme_BPP for how to construct
merge 1:1 unique year using `data'/bpp_consv5, keep(1 3) nogen
ren broadc_bppv2 bpp

*generate quarters based on interview month
gen qtr = 1 if intmo >=1 & intmo <=3
replace qtr = 2 if intmo>=4 & intmo <=6
replace qtr = 3 if intmo >=7 & intmo <=9
replace qtr = 4 if intmo >=10 & intmo <=12

sort state year qtr

*merge in hempstat and wempstat (head and wife employment status)
drop hempstat 
merge 1:1 unique year using `psid'/Employment/hmr_psid_employment_fin, keepusing(hempstat wempstat) keep(1 3) nogen

********************************************************************************
*currently have the public state data. Replacing with restricted data (FIPS) so it will be 
* consistent with other MSA and tract data

*merge in restricted PSID data
merge 1:1 unique year using "`data'/statefips_15.dta", keep(3) nogen
rename FIPSTATE statefips 

replace statefips = . if statefips == 98 | statefips == 99 
replace state = . if state == 0

*crosswalk - change from FIPS codes to PSID state codes
g state_psid = statefips if statefips==1
replace state_psid = 50 if statefips==2
replace state_psid = statefips-2 if statefips<=6 & statefips>2
replace state_psid = statefips-3 if statefips<15 & statefips>6
replace state_psid = 51 if statefips==15
replace state_psid = statefips-5 if statefips<43 & statefips>15
replace state_psid = statefips-6 if statefips>43 & statefips<52
replace state_psid = statefips-7 if statefips>52 & statefips<=56

drop if statefips == . 

drop state
ren (state_psid statefips) (state fipstate)

********************************************************************************
* run bankhealth_lags.do first

*Bankhealth values for each state
merge m:1 state year qtr using `data'/bankh_state_lags_fin_hmr_15.dta, keep(1 3) nogen
sort state year qtr

*bankhealth values for each state excluding MSAs
merge m:1 state year qtr using `data'/bankh_nmst_lags_fin_hmr_15.dta, keep(1 3) nogen
sort unique year

*MSA level bankhealth
merge 1:1 unique year qtr using `data'/bankh_merged_msa_fin_hmr_15.dta, keep(1 3) nogen

**********************************************************
*merge in CAMELS data
/* [confidential regulatory data so no code can be provided]
if you get all the other data but not CAMELS, comment out all camels references
and take cam out of the loops in the variables section */

sort state year qtr
*Non-MSA State
merge m:1 state year qtr using "`data'/cam_nmst_lags_15_hmr.dta", keep(1 3) nogen

*MSA 
merge m:1 unique year using "`data'/cam_msa_merge_15_hmr.dta", keep(1 3) nogen
sort state year qtr

*State
merge m:1 state year qtr using "`data'/cam_state_lags_15_hmr.dta", keep(1 3) nogen

***************************************
*merge in house price growth data at msa level (CoreLogic)
* (FHFA (public) house price data could also be substituted)
* run hpi.do first 
merge m:1 unique year using "`data'/hpi_msa_merge_15.dta", keep(1 3) nogen 
merge m:1 state year qtr using "`data'/corelogic_hpi_fin_15.dta", keep(1 3) nogen
xtset unique year

*********************************************************************
*merge in tract change data - this is used to correct the moving variable
* constructed in restricted data (see comment below)
merge 1:1 unique year using "`data'/tr_ch_v3_2015.dta", keep(1 3) nogen

********************************************************************
*merging in bartik employment growth
*run emp_bartik_naic_sic.do first

*MSA level data 
merge 1:1 year qtr unique using `data'/bartik_msa_psid_back_15, keep(1 3) nogen
merge 1:1 year qtr unique using `data'/bartik_msa_psid_for_15, keep(1 3) nogen

*state level data
merge m:1 year qtr state using `data'/bartik_st_weighted_back, keep(1 3) nogen
merge m:1 year qtr state using `data'/bartik_st_weighted_for, keep(1 3) nogen

*********************************************************************
/*samehead and newwife variables sometimes coded as new head/wife when person exits 
PSID for several waves and then reenters, even if same head/wife
	make sure age is on the same path - if so, change such that head/wife does not change.
	samehead variable used below to determine if 'new' household under our definition
*created in psid_samehead_fix.do*/

merge 1:1 unique year using `psid'/samehead_fix, keep(1 3) nogen

*********************************************************************
*merging in the variables for the age earnings profiles
*run psid_ageearnings.do first

*get deviations from age-earnings profiles for head and wife
*also get retired, student, nilf, and business earnings designations for head and wife (created in a-e process)
*also corrected age: 
	*from individual rather than HH file
	*and corrected in ageearnings where data was by individual not HH. so had entire history with which to adjust 
drop ageh agew

merge 1:1 unique year using `data'/age_earning_profiles_posttax_15_hmr.dta, keepusing(wper_dev_med hper_dev_med wretired hretired wstudent hstudent wnilf hnilf wbus hbus hage wage hhper_dev_med avgsqdev lnyhat) keep(1 3) nogen
*merge _1 is years and latino sample. will all be dropped later anyways. 

ren (hage wage) (ageh agew)

*increase age by 1: currently lagged to calculate age-earnings
*now want consistent timing t as other individual level/demographics controls
replace ageh = ageh + 1
replace agew = agew + 1 

*********************************************************************
*creating a variable that shows how many observations a household has in the raw PSID sample
bysort unique: egen raw_obs = count(unique) if ((famincc!=. & year<1985) | (famincratc!=. & year>1984)) & ageh!=.

*********************************************************************
g qdate = yq(year, qtr)
format qdate %tq

*many of the non-merges because no interview month. and thus no measures that line up with a quarter (ex BH, HP, emp)
drop if intmo == . 

*********************************************************************
* merge in cleaned education data
* see psid_educ_data_cleaning.do
merge 1:1 unique year using `psid'/Education/education_hmr_fin.dta, keep(1 3) keepusing(educh educw) nogen

save `data'/merged_data_`version'_15.dta, replace
}

********************************************************************************
if cleaning {

foreach dataset in hw hw_fw {

if `dataset'==1 {

use `data'/merged_data_`version'_15.dta
di "`dataset'"

*keep 1984 so can look at lags. later will limit to 1985. 
drop if year < 1984 

xtset unique year 

*Split into seperate households if the head changes
*Don't want income jumps etc. if actually because head changed 
g newhh = 1 if samehead == 0 
replace newhh = 1 if unique != unique[_n-1] 

egen unique2 = group(unique newhh year)
carryforward unique2, replace
g old_unique = unique // preserve old unique codes as well (need for latino sample) 
replace unique = unique2
drop unique2

////////////////////////////////////////////////////////////////////////////////
// Income variable 
*faminc was bottom coded to 1 for some years if it was negative, but not for others. 
* So this manipulation drops the negative income values. 
*Note: We don't need to drop the lags of faminc because faminc is already lagged
replace famincratc=. if famincc==1

* the timing for famincrat is sometimes off, famincratc has the correct timing. 
replace famincrat=famincratc

*log income
replace ln_famincrat = ln(famincrat)

////////////////////////////////////////////////////////////////////////////////
// Fixing the moving variable based on tract data
*tr_ch (was created with confidential geo data) is a count of the different tracts 
* a household is in. The first observation is equal to 1, and everytime a household's 
* tract changes, tr_ch increases by 1.
xtset unique year

g tr_ch_dum=0
replace tr_ch_dum = 1 if tr_ch!=L.tr_ch & tr_ch!=. & L.tr_ch!=. & year<1998
replace tr_ch_dum = 1 if tr_ch!=L2.tr_ch & tr_ch!=. & L2.tr_ch!=. & year>1998

*always replacing moved if tr_ch_dum = 1 so that way don't have to worry about the 
* start and end dates to our variables; also they could have moved within the tract, 
* so that wouldn't be caught by tr_ch, but hopefully would be by moved
g moved_original = moved
replace moved=1 if moved==0 & tr_ch_dum==1
g moved_diff = moved - moved_original

////////////////////////////////////////////////////////////////////////////////
** Changing lags on certain variables before we drop observations.
* adjust for ease of running regressions given different timing in PSID

// Wealth
* adjust lag totalfw, liqw (so 1990 has 89 fin wealth, and 1995 has 94 fin wealth, and 1999 has no wealth)
foreach v in totalfw liqw_combo {
replace l_`v' = L.`v' if year==1985
replace l_`v' = . if year==1989
replace l_`v' = L.`v' if year==1990
replace l_`v' = . if year==1994
replace l_`v' = L.`v' if year==1995
replace l_`v' = . if year==1999

replace `v' = . if (year>1984 & year<1989) | (year>1989 & year<1994) | (year>1994 & year<1999)
} 

* remove remaining topcoding for fw variables
replace totalfw=. if w_ira==999998 |  w_ore==999998 |  w_farm==999998 |  w_stk==999998 | w_cash==999998 |  w_bond==999998 | w_odebt==999998 | w_vehic==999998

*Setting IRA wealth prior to 1999 as missing
* b/c prior to 1999 the PSID included the value of IRAs in its measure of households’ stock wealth
replace w_ira = . if year < 1999

* Total Assets (house value plus financial assets) 
egen totalfa = rowtotal(hvalue w_ore w_farm w_stk w_cash w_bond w_vehic w_ira)
replace totalfa = . if w_ira==999998 |  w_ore==999998 |  w_farm==999998 |  w_stk==999998 | w_cash==999998 |  w_bond==999998 | w_vehic==999998

* Total Debt (mortgage debt plus other debt)
egen totaldebt = rowtotal(rpmort w_odebt)

foreach x in totalfa totaldebt {
replace `x' = . if (year>1984 & year<1989) | (year>1989 & year<1994) | (year>1994 & year<1999)

gen r`x' = `x'/def

by unique: gen l_r`x' = L2.r`x' if year > 1999
by unique: replace l_r`x' = L.r`x' if year < 1999
}

drop w_ore w_farm w_stk w_cash w_bond w_vehic w_ira w_odebt totalfa totaldebt rtotalfa rtotaldebt
////////////////////////////////////////////////////////////////////////////////
** drops for mortgages, house value jumps

*egen was used to create rpmort starting in 1994, but that means when rpmort1st==. 
* and rpmort2nd==0, rpmort==0, then it should be missing
replace rpmort=. if (rpmort1st==. | rpmort2nd==.) & year>1993

#delimit ;

g drop=1 if (rpmort<.25*L.rpmort & rpmort<.25*F.rpmort)
	& L.rpmort!=. & F.rpmort!=. & moved!=1 & F.moved!=1 
	& L.rpmort>=25000 & F.rpmort>=25000 & howner!=. & hvalue!=0 & year<1997 ;
	
replace drop=1 if (rpmort<.25*L.rpmort & rpmort<.25*F2.rpmort)
	& L.rpmort!=. & F2.rpmort!=. & moved!=1 & F2.moved!=1 
	& L.rpmort>=25000 & F2.rpmort>=25000 & howner!=. & hvalue!=0 & year==1997 ;
	
replace drop=1 if (rpmort<.25*L2.rpmort & rpmort<.25*F2.rpmort)
	& L2.rpmort!=. & F2.rpmort!=. & moved!=1 & F2.moved!=1 
	& L2.rpmort>=25000 & F2.rpmort>=25000 & howner!=. & hvalue!=0 & year>1998 ;

bysort unique: egen drop_max = max(drop) ;

g jump=1 if (rpmort>4*L.rpmort & rpmort>4*F.rpmort) 
	& rpmort!=. & L.rpmort!=. & F.rpmort!=. & L.rpmort!=0 & F.rpmort!=0 
	& moved!=1 & F.moved!=1 & rpmort>=25000 & howner!=. & hvalue!=0 & year<1997 ;
	
replace jump=1 if (rpmort>4*L.rpmort & rpmort>4*F2.rpmort) 
	& rpmort!=. & L.rpmort!=. & F2.rpmort!=. & L.rpmort!=0 & F2.rpmort!=0 
	& moved!=1 & F2.moved!=1 & rpmort>=25000 & howner!=. & hvalue!=0 & year==1997 ;
	
replace jump=1 if (rpmort>4*L2.rpmort & rpmort>4*F2.rpmort) 
	& rpmort!=. & L2.rpmort!=. & F2.rpmort!=. & L2.rpmort!=0 & F2.rpmort!=0 
	& moved!=1 & F2.moved!=1 & rpmort>=25000 & howner!=. & hvalue!=0 & year>1998 ;

#delimit cr

bysort unique: egen jump_max = max(jump)

g adj=1 if (jump==1 & L.drop==1) | (L.jump==1 & drop==1) & year<1998
replace adj=1 if (jump==1 & L2.drop==1) | (L2.jump==1 & drop==1) & year>1998

bysort unique: egen adj_max = max(adj)

* drop obs where the house value jumps/drops, using our rule of x4/x0.25
* see discussion in the appendix

g ratio = hvalue/L.hvalue if howner==1 & year<1998
replace ratio = hvalue/L2.hvalue if year>1998 & howner==1

#delimit ;
g typo_hvalue = 1 if ((ratio>4 & ratio!=. & F.ratio<0.25 & moved!=1 & F.moved!=1) 
	| (ratio<0.25 & F.ratio>4 & F.ratio!=. & moved!=1 & F.moved!=1)) & year<1997 ;
		
replace typo_hvalue = 1 if ((ratio>4 & ratio!=. & F2.ratio<0.25 & moved!=1 & F2.moved!=1) 
	| (ratio<0.25 & F2.ratio>4 & F2.ratio!=. & moved!=1 & F2.moved!=1)) & year>1996 ;
#delimit cr

g l_typo_hvalue = L.typo_hvalue if year<1998
replace l_typo_hvalue = L2.typo_hvalue if year>1998

*Dropping housing value jumps/drops
drop if l_typo_hvalue==1
////////////////////////////////////////////////////////////////////////////////
** drops for illogical houseowner, renter values 
g howner_wrong=1 if hvalue==0 & howner==1
replace hvalue=. if hvalue==0 & howner==1
replace howner_wrong=1 if hvalue!=0 & hvalue!=. & howner==0

if "`dataset'" == "hw" {
	preserve
	g l_howner_wrong= L.howner_wrong if year<1998
	replace l_howner_wrong = L2.howner_wrong if year>1998
	drop if l_howner_wrong == 1
	restore
}

// homeowners who switch to renter for one period in the middle of their time series, and are probably still homeowners
#delimit ;
count if howner==0 & ((L.howner==1 & F.howner==1 & year<1998 & L.moved!=1) 
	| (L2.howner==1 & F2.howner==1 & year>1998 & L2.moved!=1)) & moved!=1 ;
	
replace howner_wrong=1 if howner==0 & ((L.howner==1 & F.howner==1 & year<1998 & L.moved!=1) 
	| (L2.howner==1 & F2.howner==1 & year>1998 & L2.moved!=1)) & moved!=1 ;
	
replace howner=. if howner==0 & ((L.howner==1 & F.howner==1 & year<1998 & L.moved!=1) 
	| (L2.howner==1 & F2.howner==1 & year>1998 & L2.moved!=1)) & moved!=1 ;
#delimit cr
g l_howner_wrong= L.howner_wrong if year<1998
replace l_howner_wrong = L2.howner_wrong if year>1998

drop if l_howner_wrong==1

* adjusting moving if zero but the previous period said owner and now renter or vice versa
replace moved=1 if ((L.howner==1 & howner==0) | (L.howner==0 & howner==1)) & year<1998
replace moved=1 if ((L2.howner==1 & howner==0) | (L2.howner==0 & howner==1)) & year>1998

drop ratio 

////////////////////////////////////////////////////////////////////////////////
*make final drops based on mortgages
g rpmort_wrong=1 if jump==1 | drop==1

g l_rpmort_wrong = L.rpmort_wrong if year<1998
replace l_rpmort_wrong = L2.rpmort_wrong if year>1998

drop if l_rpmort_wrong==1

////////////////////////////////////////////////////////////////////////////////
** dropping high loan to value ratios (LTV>2)
g l_ltv=L.ltv if year<1998
replace l_ltv = L2.ltv if year>1998

drop if l_ltv>2 & l_ltv!=.

////////////////////////////////////////////////////////////////////////////////
* Drop the Latino sample (1990 - 1995)
drop if old_unique >=7001001 & old_unique<=9308399

////////////////////////////////////////////////////////////////////////////////
** Age restriction, keep owners/renters only, drop those missing howner data
drop if l_howner==2
drop if l_howner==.

*getting rid of retirees and students:: ONLY IF BOTH
*NOT NLF 
g hrs = 1 if hretired == 0 & hstudent == 0 
g wrs = 1 if wretired == 0 & wstudent == 0 
drop if wrs != 1 & hrs != 1 
*if there is no wife, then wrs will be missing

*drop if ANYONE in HH has business income
drop if hbus == 1 | wbus == 1 

////////////////////////////////////////////////////////////////////////////////
replace ageh = . if ageh==0 | ageh ==999
gen age2 = ageh^2/1000
gen age3 = ageh^3

gen numkidssq = numkids^2
gen famsizessq = famsize^2

////////////////////////////////////////////////////////////////////////////////
** Indicator variable for dual earner households 

*dual earner if both are 'earners' ~ in the labor force. 
foreach v in h w {
	g `v'earn = 1 if `v'retired != 1 & `v'student != 1 & `v'nilf != 1 
}	

xtset unique year

*if there is no wife, make wearn missing -- lf vars coded as missing if no wife 
*do not want dual earner if there is no wife to begin with
replace wearn = . if wretired == . 

g dualhh = 0 	
replace dualhh = 1 if hearn == 1 & wearn == 1 

* wife indicator
drop wife
g wife = 0 
replace wife = 1 if wretired != .
		*retired variable constructed with no missings in age-earnings
		*so if missing, that means the head had no wife to match with 

*indicator married/cohabitating and single earner or not earner
	*if dualhh == 0 could be because neither head or wife is an earner
g m_se = 0
replace m_se = 1 if dualhh == 0 & wife == 1
replace m_se = . if dualhh == . | wife == .

*indicator dual earner
g m_de = 0
replace m_de = 1 if dualhh == 1 
replace m_de = . if dualhh == . 

////////////////////////////////////////////////////////////////////////////////
** consumption measures 

*prior to 1999, most of the pexpn contributing variables don't exist 
foreach var in pexpn rhealth rtransp rchildcare rschool rcarcost rutil {
	replace `var' = . if year < 1999
}

*other consumption measures that start in 2005 
foreach var in rhrepair rhfurn rhrec rcloth rvaca {
	replace `var' = . if year < 2005
}

*replacing rfood, bpp to missing in 1989, because we actually don't have data for 1989 food consumption, those values are from 1990 
replace rfood = . if year==1989
replace bpp = . if year == 1989

* log
** bpp is already a log
gen ln_psidc_food = ln(rfood)
gen ln_psidc_hh = ln(pexpn)
gen ln_psidc_hh2 = ln(pexpnf)

* alt (more comprehensive, discretionary) consumption measure
egen altc1 = rowtotal(rhfurn rhrec rvaca rschool rcloth rfood)
gen ln_psidc_altc1 = ln(altc1)

////////////////////////////////////////////////////////////////////////////////
** Wealth to income ratios 
xtset unique year

* adjust income for wealth ratios to be contemporaneous (has to be estimated after 97).
* famincrat2yr = (famincrat + L2.famincrat)/2 after 1997 (done in raw PSID setup)
replace famincrat2yr=famincrat if year<1999
* replace with non average value if before 1999, do have data every year then

* wealth ratios; two year income data in the denominator, l_totalfw and l_liqw are real
gen lfwr_combo = l_liqw_combo/ famincrat2yr
gen lfwr = l_liqw_jl / famincrat2yr
gen fwr = (l_totalfw-l_liqw_jl)/famincrat2yr   
gen hewr = l_rhequity/(famincrat2yr)
gen tfwr = (l_totalfw)/famincrat2yr

* Total wealth is financial wealth for renters and financial wealth plus housing equity for owners.
gen totwr = (l_totalfw)/famincrat2yr if howner != 1
replace totwr = (l_totalfw + l_rhequity)/famincrat2yr if howner == 1

* total assets (house value plus financial assets) and total debt (mortgage debt plus other debt) relative to income
gen totar = l_rtotalfa/famincrat2yr
gen totdr = l_rtotaldebt/famincrat2yr

*drop where the financial wealth jumps/drops, using our rule of x4/x0.25
g ratio = totalfw/L.totalfw if howner == 1 & year < 1998
replace ratio = totalfw/L2.totalfw if howner == 1 & year > 1998

g typo_totalfw = 1 if ((ratio>4 & ratio!=. & F.ratio<0.25) | (ratio<0.25 & F.ratio>4 & F.ratio!=.)) & year<1997
replace typo_totalfw = 1 if ((ratio>4 & ratio!=. & F2.ratio<0.25) | (ratio<0.25 & F2.ratio>4 & F2.ratio!=.)) & year>1996

g l_typo_totalfw = L.typo_totalfw if year<1998
replace l_typo_totalfw = L2.typo_totalfw if year>1998

if "`dataset'"=="hw_fw" drop if l_typo_totalfw==1

* set hewr to zero where l_rhequity==0 & l_howner==0 (they were a renter in the previous period)
replace hewr=0 if l_rhequity==0 & l_howner==0

xtset unique year
////////////////////////////////////////////////////////////////////////////////
** adjusting and creating Bank Health variables

*Note that CAMELS and RENPL have different numbers of obs than NPL because banks 
* didn't answer all survey questions in all years
*In the regression analysis, we will impose the same sample for npl and renpl so 
* directly comparable (imposing renpl's sample)
*CAMELS however will be left with a different number of observations. very different measure to begin with.

#delimit ; 

*Duplicating bank health variables so they have easier names to work with;
foreach n in msa nmst st {;
	*RENPL (ren_asset) and NPL (npl_asset); 
	foreach bh in ren_asset npl_asset {;
	
	if "`bh'" == "ren_asset" local b re_a;
	if "`bh'" == "npl_asset" local b npl_a;

	*Call Report measures renaming ;
	g `b'_50_`n' =   l4_ent_`bh'p50_`n';
	
	foreach p in 50 {;
		*multi splits at bank level: percentiles;
		foreach t in bhc { ;
			g `b'_`p'_5m_`t'_`n' =  l4_M05`t'_`bh'p`p'_`n';
			g `b'_`p'_10m_`t'_`n' =  l4_M10`t'_`bh'p`p'_`n';
		} ;	
	};
	};
	
	
	*CAMELS renaming;
	g cam_wa_`n' = l4_cam_comp_dep_wgtd_`n';
	
	*BNK;
	foreach v in 5m 10m { ;
	g cam_wa_`v'_ent_`n' = l4_cam_comp_`v'_wgtd_`n';
	} ;
	
	*BHC;
	foreach v in 5m 10m { ;
	g cam_wa_`v'_bhc_`n' =  l4_cam_wavg_t`v'_bhc_`n' ;
	} ;

};

local bh_ab "cam re_a npl_a "; 

* Drop other lags ;
drop l1_* l2_* l3_* ;


local bankhealth_trunc 
"
l4_LocShrM05bhc_2_refix

npl_a_50
npl_a_50_5m_bhc 
npl_a_50_10m_bhc 

re_a_50 
re_a_50_5m_bhc 
re_a_50_10m_bhc 

cam_wa
cam_wa_5m_bhc
cam_wa_10m_bhc
";

*MSA value if it exists, otherwise NMST value; 
foreach bh in `bankhealth_trunc' {;
	g `bh'_ev = `bh'_msa if `bh'_msa != .;
	replace `bh'_ev = `bh'_nmst if `bh'_msa == .;	
};

/* /////////////////////////////////////////////////////////////////////////// */
** Renter and Owner dummies ;
g renter = 1 if l_howner==0;
replace renter = 0 if l_howner==1;
g owner=0;
replace owner=1 if renter==0;

/* /////////////////////////////////////////////////////////////////////////// */
** more BH variable cleaning ;

foreach n in msa nmst st ev {;

	foreach b in M05 {;
		foreach t in bhc {;
		
			*making the shares into percentages;
			g l4_LocShr`b'`t'_100refix_`n' = 100*l4_LocShr`b'`t'_2_refix_`n';
			
			*taking the log of percent shares;
			g ln_l4_LocShr`b'`t'_100refix_`n' = ln(l4_LocShr`b'`t'_100refix_`n' + 1);
		};
	};
};

*big local of BH measures again; 
local bankhealth ""; 

foreach n in msa st nmst ev {;

	foreach bh in `bh_ab' {;
	
		if "`bh'"!="cam" local measure 50;
		if "`bh'"=="cam" local measure wa;

		foreach p in `measure' {;
		
			local bankhealth  `bankhealth'
			
			`bh'_`p'_`n'
			
			`bh'_`p'_5m_bhc_`n' 
			`bh'_`p'_10m_bhc_`n' ;
		};
	
	};

};
/* /////////////////////////////////////////////////////////////////////////// */
** Local variables: employment, HP growth ;

// Bartik Employment growth variables: NAIC backwards and SIC forwards;
ds bartik*;
foreach v in `r(varlist)' {;
	replace `v' = `v' / 100;
};

foreach dir in for back {;
	g bartik_empgr_`dir'_ev = bartik_empgr_`dir'_st;
	replace bartik_empgr_`dir'_ev = bartik_empgr_`dir'_msa if bartik_empgr_`dir'_msa !=.;
};

*restricted;
foreach dir in for back {;
	g bartik_empgr_restr_`dir'_ev = bartik_empgr_restr_`dir'_st;
	replace bartik_empgr_restr_`dir'_ev = bartik_empgr_restr_`dir'_msa if bartik_empgr_restr_`dir'_msa !=.;
};

// HPI growth variable;

g hp_ev = hpgrowm_4qtr_st;
replace hp_ev = hpgrowm_4qtr_msa if hpgrowm_4qtr_msa != .;
	
#delimit cr

////////////////////////////////////////////////////////////////////////////////
** Variable drops: missings, negative income etc.

*dropping missing consumption for base sample: food 
drop if ln_psidc_food==.

*drop missings of demographic variables 
drop if famsize==.
drop if dualhh==.
drop if ageh==.

*drop if income missing
drop if famincrat==.

*drop log_income if income is negative
replace ln_famincrat=. if famincrat<0
replace ln_famincrat=. if famincrat2yr<0
replace ln_famincrat=. if (L2.famincrat==0 & year>=1999)
replace ln_famincrat=. if (L2.famincrat<0 & year>=1999)
replace ln_famincrat=. if famincrat==0
drop if ln_famincrat==.

*Drop if consumption zero
drop if rfood==0

*drop missing hewr
drop if hewr==.

*drop missing tfwr
if "`dataset'"=="hw_fw" drop if tfwr==.

* used to construct wealth ratios, drop if missing/less than zero
drop if famincrat2yr==.
drop if famincrat2yr < 0 

////////////////////////////////////////////////////////////////////////////////
** Trim outliers 1% - by year since more in crisis years

* based on food consumption
local hw_fw_var "ln_psidc_food hewr tfwr totwr totar totdr ln_famincrat famincrat2yr"
local hw_var "ln_psidc_food hewr ln_famincrat famincrat2yr"

local cons food
foreach y in ``dataset'_var' {
		forval i=0/1 {
			forvalues x = 1980/2015 {
				_pctile `y' if `y' !=. & year ==`x' & l_howner==`i', percentiles(1(98)99)
				replace `y'=. if (`y' < r(r1) | `y' > r(r2)) & year ==`x' & l_howner==`i' & `y'!=.
		}
	}
}


////////////////////////////////////////////////////////////////////////////////
** Winsorize outliers 1% - by year since more in crisis years
*don't winsorize CAMELS 
* see text for further discussion of trimming/winsorizing

#delimit ;

*BH measures; 
foreach n in ev msa nmst {;
	foreach bh in npl_a re_a  {;
		foreach p in 50 {;
			*For split variables using bank and bhc definitions;
			foreach b in bhc {;
				foreach t in 5m_`b' 10m_`b' {;	
					forvalues y = 1980/2015 {;
						
						qui su `bh'_`p'_`t'_`n' if year == `y', d;
						replace `bh'_`p'_`t'_`n' =r(p99) if `bh'_`p'_`t'_`n'>r(p99) & `bh'_`p'_`t'_`n'!=. & year == `y';
						replace `bh'_`p'_`t'_`n' =r(p1) if `bh'_`p'_`t'_`n'<r(p1) & `bh'_`p'_`t'_`n'!=. & year == `y';
					};	
				};	
			};
				
			*For old non split variables;
			forvalues y = 1980/2015 {;			
				qui su `bh'_`p'_ev if year == `y', d;
				replace `bh'_`p'_`n' =r(p99) if `bh'_`p'_`n'>r(p99) & `bh'_`p'_`n'!=. & year == `y';
				replace `bh'_`p'_`n' =r(p1) if `bh'_`p'_`n'<r(p1) & `bh'_`p'_`n'!=. & year == `y';
			};
		};
	};
};

*HPI and employment growth;
foreach var in hp_ev bartik_empgr_for_ev bartik_empgr_back_ev bartik_empgr_restr_for_ev bartik_empgr_restr_back_ev {;
	
	g `var'_w1=`var';

	forval y = 1980/2015 {;
	
		*one percent tails;
		_pctile `var' if year == `y', percentiles(1, 99);
		replace `var'_w1 = r(r1) if `var'<r(r1) & year == `y' ;
		replace `var'_w1 = r(r2) if `var'>r(r2) & `var'!=. & year == `y';
	};
};

*Income Volatility;
_pctile avgsqdev, percentiles(1, 99);
replace avgsqdev = r(r2) if avgsqdev>r(r2) & avgsqdev!=. ;

* Drop outliers (identified above in trimming outliers section) ;
foreach y in ``dataset'_var' { ;
	drop if `y'==. ;
} ;

////////////////////////////////////////////////////////////////////////////////
** Location dummies: Fixed effects and Cluster SE;

// indicator of who lives in the same locations (MSA or nmst part of a state);
*merge in CBSA dummies from PSID restricted files to use as location fixed effects;
	*different variable for each CBSA, so we can group people who live in the same CBSA; 

*need to merge the cbsa dummies based on the old uniques - that is what is in the confidential PSID;  
ren unique new_unique;
ren old_unique unique;

merge 1:1 unique year using `data'/cbsa_dum_15, nogen keep(1 3);

ren unique old_unique;
ren new_unique unique;

foreach var of varlist cbsa_dum* {;
	replace `var' = 0 if `var' == .;
};

*dummies for NMST locations that have unique IDs;
*if don't have any positive values for any of the cbsa_dummies, then nmst;
egen msa_tot = rowtotal(cbsa_dum*);

levelsof state, local(st);
foreach s in `st' {;
	g nmst_dum`s' = 0;
	replace nmst_dum`s' = 1 if state == `s' & msa_tot == 0;
};

// variable with a unique value for each location (clustering standard errors and FE); 
g location_id = .;
forval i = 1/794 {;
	replace location_id = `i' if cbsa_dum`i' == 1;
};
forval i = 1/51 {;
	replace location_id = `i'+794 if nmst_dum`i' == 1;
};

// state x year for fixed effects ;
gen styr = year*100 + state ;

forval i = 1/51 { ;
gen stdum`i' = 0 ;
replace stdum`i' = 1 if state == `i' ;
} ;

#delimit cr

////////////////////////////////////////////////////////////////////////////////
** create indicator of if included in regressions i.e. key variables are not missing
*regression commands will automatically drop these observations, but want to get 
* an accurate sample when creating summmary stats

#delimit ; 
 
foreach bh in npl_a re_a cam {;
	if "`bh'"!="cam" local p "50" ;
	if "`bh'"=="cam" local p "wa" ;
	
	if "`dataset'"=="hw_fw"  {;
		*Everyone (MSA and non-MSA);
		g every_dum_`bh' = 1 if ln_psidc_food !=. &  ln_famincrat!=. &  hewr!=. &  tfwr!=. & `bh'_`p'_ev!=. & hp_ev!=. & ageh!=. & age2!=. & dualhh!=. & famsize!=. & famsizessq!=. & bartik_empgr_back_ev !=.;
	};
	
	if "`dataset'"=="hw" {;
		*Everyone (MSA and non-MSA);
		g every_dum_`bh' = 1 if ln_psidc_food !=. &  ln_famincrat!=. &  hewr!=. &  `bh'_`p'_ev!=. & hp_ev!=. & ageh!=. & age2!=.  & dualhh!=. & famsize!=. & famsizessq!=. & bartik_empgr_back_ev!=.;

	};
};

*replicate every_dum_`bh' to use for counting (obsdrop);
g every_dum_re_a_dup = every_dum_re_a; 

*Replacing observations with missing if singleton: the household has less than 2 
* observations or location has less than 2 observations;
*regression automatically drops these, but want to account for sum stats;
* for constraint variables (haven't been created yet in this do file); 
foreach bh in npl_a re_a cam {;

	g x = 1;
	while x != 0 {;
	
		*count observations by unique and replace sample dummy with missing if fewer than 2 observations; 
		bysort unique: egen every_dum_`bh'_obs = total(every_dum_`bh');
		replace every_dum_`bh' = . if every_dum_`bh'_obs<2;
		
		*do the same with observations by location; 
		bys location_id: egen location_`bh'_obs = total(every_dum_`bh');
		replace every_dum_`bh' = . if location_`bh'_obs < 2;
		
		*another loop? did dropping location singletons create more HH singletons?;
		drop every_dum_`bh'_obs;
		bysort unique: egen every_dum_`bh'_obs = total(every_dum_`bh');
		count if every_dum_`bh'_obs == 1;
		replace x = r(N);
		
		drop every_dum_`bh'_obs location_`bh'_obs ;
	};
	
	drop x;
};

*Versions of these variables for multi-bank measures;
foreach bh in npl_a re_a cam {;
	g every_dum_`bh'_bhc10rf = every_dum_`bh';
	g every_dum_`bh'_bhc5rf = every_dum_`bh';

	if "`bh'" != "cam" local multi _50;
	if "`bh'" == "cam" local multi _wa;

	replace every_dum_`bh'_bhc10rf = . if ln_l4_LocShrM05bhc_100refix_ev < ln(10) | `bh'`multi'_5m_bhc_ev == . ;
	replace every_dum_`bh'_bhc5rf = . if ln_l4_LocShrM05bhc_100refix_ev < ln(5) | `bh'`multi'_5m_bhc_ev == .;
};


foreach bh in npl_a re_a cam {;

	foreach multi in bhc {;
	
		foreach p in 10rf 5rf {;

			g x = 1;
			while x != 0 {;
			
				*count observations by unique and replace sample dummy with missing if fewer than 2 observations; 
				bysort unique: egen every_dum_`bh'_`multi'`p'_obs = total(every_dum_`bh'_`multi'`p');
				replace every_dum_`bh'_`multi'`p' = . if every_dum_`bh'_`multi'`p'_obs<2;
				
				*do the same with observations by location; 
				bys location_id: egen location_`bh'_`multi'`p'_obs = total(every_dum_`bh'_`multi'`p');
				replace every_dum_`bh'_`multi'`p' = . if location_`bh'_`multi'`p'_obs < 2;
				
				*another loop? did dropping location singletons create more HH singletons?;
				drop every_dum_`bh'_`multi'`p'_obs;
				bysort unique: egen every_dum_`bh'_`multi'`p'_obs = total(every_dum_`bh'_`multi'`p');
				count if every_dum_`bh'_`multi'`p'_obs == 1;
				replace x = r(N);
				
				drop every_dum_`bh'_`multi'`p'_obs location_`bh'_`multi'`p'_obs ;
			};
			
			drop x;
		};
	};
};

di "`dataset'";

save `data'/`filename'_`dataset'_pre_15.dta, replace;
 
********************************************************************************;
}; /*if `dataset' {*/ /*cleaning section*/
}; /*foreach dataset in fw hw {*/ /*cleaning section*/
};/*if cleaning*/ /*cleaning section*/

*******************************************************************************
#delimit ;
if variables {;

foreach dataset in hw hw_fw {;
if `dataset' {;

preserve;

if make {;
use `data'/`filename'_`dataset'_pre_15.dta, clear;
	
ren *nmst* *nst*;

/* ////////////////////////////////////////////////////////////////////////// */
* Scaling ;

// scaling  wealth;
foreach var in totwr totar totdr {;
	g `var'_100 = `var'/100;
	drop `var' ;
};

// adjusting bankhealth variables by dividing bly 100 because coefficents too small otherwise, hard to interpret;
foreach n in msa nst ev {;
	foreach bh in re_a npl_a {;
	local measure 50 ;
		foreach p in `measure' {;
			foreach t in 5m 10m {;
				foreach b in bhc {;
					replace `bh'_`p'_`t'_`b'_`n' = `bh'_`p'_`t'_`b'_`n'/100;					
				};
			};
		
			replace `bh'_`p'_`n' = `bh'_`p'_`n'/100;
		};
	};
};

/* ////////////////////////////////////////////////////////////////////////// */
** Constraint definitions
////////////
*Creating deviation dummies for liquid assets/income;
*value of one if have liquid assets over percentile threshold i.e. not constrained;

** Need at least 2 observations for constrained for fixed effects **

*drop if less than 2 non-missing liqw observations;
foreach bh in re_a cam {;

	g lfwr_combo_`bh' = lfwr_combo;
	bysort unique: egen lfwr_combo_obs_`bh' = total(every_dum_`bh'==1) if lfwr_combo !=.;
	replace lfwr_combo_`bh' = . if lfwr_combo_obs_`bh' <2;

	g lfwr_`bh' = lfwr;
	bysort unique: egen lfwr_obs_`bh' = total(every_dum_`bh'==1) if lfwr !=.;
	replace lfwr_`bh' = . if lfwr_obs_`bh' <2;
};

**50%**;
foreach bh in re_a cam {;
	g liqw_dev_50_`bh' = 0;	
	qui su lfwr_combo_`bh', de;
	replace liqw_dev_50_`bh' = 1 if lfwr_combo_`bh' >= r(p50) & lfwr_combo_`bh' != . ;
	replace liqw_dev_50_`bh' = . if lfwr_combo_`bh' == .;
};

foreach bh in re_a cam {;
	g liqw_dev_50_nar_`bh' = 0;
	qui su lfwr_`bh', de;
	replace liqw_dev_50_nar_`bh' = 1 if lfwr_`bh' >= r(p50) & lfwr_`bh' != . ;
	replace liqw_dev_50_nar_`bh' = . if lfwr_`bh' == .;
};

////////////
**Constraint dummies based on age earning profiles;
*these constraints are pre-lagged to match with lagged income etc.;
		
*NEG DEV;
g const_alpha_dev = 0;

*missing if no deviation measure;
replace const_alpha_dev = . if hper_dev_med == . & wife == 0;
	*if single, then missing if head is missing;
replace const_alpha_dev = . if hper_dev_med == . & wper_dev_med == . & m_se == 1;
	*if single earner, then one of these is always missing. so entire thing missing if both;
replace const_alpha_dev = . if hhper_dev_med == . & m_de == 1;		
	*if dual earner, then missing if HH missing i.e. either head or wife missing; 
	
*1 if neg dev;
replace const_alpha_dev = 1 if hper_dev_med < 0 & wife == 0; 
	*if single, constrained if head constrained;
replace const_alpha_dev = 1 if (hper_dev_med < 0 | wper_dev_med < 0) & m_se == 1;
	*if married se, constrained if either is constrained (other will be missing);
replace const_alpha_dev = 1 if hhper_dev_med < 0 & m_de == 1; 
	*if married de, constrained if HH is constrained; 
	
*5% NEG DEV;
g const_alpha_dev_5 = 0;

*missing if no deviation measure; 
replace const_alpha_dev_5 = . if hper_dev_med == . & wife == 0;
	*if single, then missing if head is missing;
replace const_alpha_dev_5 = . if hper_dev_med == . & wper_dev_med == . & m_se == 1;
	*if single earner, then one of these is always missing. so entire thing missing if both;
replace const_alpha_dev_5 = . if hhper_dev_med == . & m_de == 1;		
	*if dual earner, then missing if HH missing i.e. either head or wife missing; 
	
*missing if between 0 and -5;
replace const_alpha_dev_5 = . if hper_dev_med < 0 & hper_dev_med > -5 & wife ==0; 
replace const_alpha_dev_5 = . if ((hper_dev_med < 0 & hper_dev_med > -5) | (wper_dev_med < 0 & wper_dev_med > -5)) & m_se == 1;
replace const_alpha_dev_5 = . if hhper_dev_med < 0 & hhper_dev_med > -5 & m_de == 1; 

*1 if -5 or less;
replace const_alpha_dev_5 = 1 if hper_dev_med <= -5 & wife == 0; 
	*if single, constrained if head constrained;
replace const_alpha_dev_5 = 1 if (hper_dev_med <= -5 | wper_dev_med <= -5) & m_se == 1;
	*if married se, constrained if either is constrained (other will be missing);
replace const_alpha_dev_5 = 1 if hhper_dev_med <= -5 & m_de == 1; 
	*if married de, constrained if HH is constrained; 

*mark if household is under `age_cut';
*If single, then head age;
*if single earner, then earner's age;
*if dual earner, then both head and wife must be under `age_cut'; 
	
g under_`age_cut' = 0;
replace under_`age_cut' = 1 if ageh < `age_cut' & wife == 0;
replace under_`age_cut' = 1 if ageh < `age_cut' & m_se == 1 & hearn == 1;
replace under_`age_cut' = 1 if agew < `age_cut' & m_se == 1 & wearn == 1;
replace under_`age_cut' = 1 if ageh < `age_cut' & agew < `age_cut' & m_de == 1;

* Low liquid assets or Under `age_cut' ;
foreach bh in re_a cam {;
gen constrained_llw`age_cut'_`bh' = 0 ;
replace constrained_llw`age_cut'_`bh' = 1 if liqw_dev_50_`bh' == 0 | (under_`age_cut' == 1 & liqw_dev_50_`bh' != .) ;

gen unconstrained_llw`age_cut'_`bh' = 0 ;
replace unconstrained_llw`age_cut'_`bh' = 1 if liqw_dev_50_`bh' == 1 & under_`age_cut' == 0 ;

* just under age cut alone ;
gen constrained_ju`age_cut'_`bh' = 0 ;
replace constrained_ju`age_cut'_`bh' = 1 if under_`age_cut' == 1 ;
gen unconstrained_ju`age_cut'_`bh' = 0 ;
replace unconstrained_ju`age_cut'_`bh' = 1 if under_`age_cut' == 0 ;

} ;

////////////
***create interactions variables for constrained***;

*DUMMIES FOR IF CONSTRAINED; 
		
foreach bh in re_a cam {;
	*no npl because using re sample for npl measure; 
	
	foreach v in negdev 5 {;
		if "`v'" == "negdev" local ver;
		if "`v'" == "5" local ver _5;

		g constrained`ver'_`bh' = 0;
		replace constrained`ver'_`bh' = 1 if const_alpha_dev`ver' == 1;
		
		g unconstrained`ver'_`bh' = 0;
		replace unconstrained`ver'_`bh' = 1 if const_alpha_dev`ver' == 0;
				
		*this is equivalent to missing if const_alpha_dev == .;
		replace constrained`ver'_`bh' = . if constrained`ver'_`bh' != 1 & unconstrained`ver'_`bh' != 1;
		replace unconstrained`ver'_`bh' = . if constrained`ver'_`bh' != 1 & unconstrained`ver'_`bh' != 1;
				
		*missing if fewer than 2 constrained versus unconstrained observations;
		bys unique: egen constrained_obs`ver'_`bh' = total(every_dum_`bh' == 1) if constrained`ver'_`bh' != . & unconstrained`ver'_`bh' != .; 
		replace constrained`ver'_`bh' = . if constrained_obs`ver'_`bh' < 2;		
		replace unconstrained`ver'_`bh' = . if constrained_obs`ver'_`bh' < 2;		

		*`age_cut' measures: missing only if can't determine if constrained or unconstrained;
			*means won't have constrained missing where age < `age_cut' st in regressions, can't estimate for those obs since both in reg;
			*if no constraint measure, but less than `age_cut' so constrained. then unconstrained = 0 not missing.; 
				*thus will have income, bh etc. observations for those variables that are constrained and included in reg; 
				*if no constraint measure and over 34, then both missing; 
		*If single, then `age_cut' is head age;
		*if single earner, then `age_cut' is earner's age;
		*if dual earner, then both head and wife must be under `age_cut'; 
		**keep same sample: don't add people that don't have deviation estimates; 
					
		g constrained_`age_cut'`ver'_`bh' = 0; 
		replace constrained_`age_cut'`ver'_`bh' = 1 if const_alpha_dev`ver'  == 1 | (under_`age_cut' == 1 & const_alpha_dev`ver' != .);
		
		g unconstrained_`age_cut'`ver'_`bh' = 0;
		replace unconstrained_`age_cut'`ver'_`bh' = 1 if const_alpha_dev`ver' == 0 & under_`age_cut' == 0; 
			
		replace constrained_`age_cut'`ver'_`bh' = . if constrained_`age_cut'`ver'_`bh' != 1 & unconstrained_`age_cut'`ver'_`bh' != 1;
		replace unconstrained_`age_cut'`ver'_`bh' = . if constrained_`age_cut'`ver'_`bh' != 1 & unconstrained_`age_cut'`ver'_`bh' != 1;
			
		*missing if fewer than 2 constrained versus unconstrained observations;
		bys unique: egen constrained_`age_cut'`ver'_obs_`bh' = total(every_dum_`bh' == 1) if constrained_`age_cut'`ver'_`bh' != . & unconstrained_`age_cut'`ver'_`bh' != .; 
		replace constrained_`age_cut'`ver'_`bh' = . if constrained_`age_cut'`ver'_obs_`bh' < 2;		
		replace unconstrained_`age_cut'`ver'_`bh' = . if constrained_`age_cut'`ver'_obs_`bh' < 2;		
		
		////////////////
		*Liquid asset measures: `age_cut' measure with high versus low liquid assets ;
		*missing if can't determine if any of these 4 so can all be in the regression;
			
		*AGE;
		g constrained_`age_cut'_hlw`ver'_`bh' = 0;
		replace constrained_`age_cut'_hlw`ver'_`bh' = 1 if (const_alpha_dev`ver' == 1 | (under_`age_cut' == 1 & const_alpha_dev`ver' != .)) & liqw_dev_50_`bh' == 1;
				
		g constrained_`age_cut'_llw`ver'_`bh' = 0;
		replace constrained_`age_cut'_llw`ver'_`bh' = 1 if (const_alpha_dev`ver' == 1 | (under_`age_cut' == 1 & const_alpha_dev`ver' != .)) & liqw_dev_50_`bh' == 0;
				
		g unconstrained_`age_cut'_hlw`ver'_`bh' = 0;
		replace unconstrained_`age_cut'_hlw`ver'_`bh' = 1 if const_alpha_dev`ver' == 0 & under_`age_cut' == 0 & liqw_dev_50_`bh' == 1;
				
		g unconstrained_`age_cut'_llw`ver'_`bh' = 0;
		replace unconstrained_`age_cut'_llw`ver'_`bh' = 1 if const_alpha_dev`ver' == 0 & under_`age_cut' == 0 & liqw_dev_50_`bh' == 0;
			
		foreach var in unconstrained constrained {;
			foreach m in h l {;
				replace `var'_`age_cut'_`m'lw`ver'_`bh' = . if constrained_`age_cut'_hlw`ver'_`bh' != 1 & constrained_`age_cut'_llw`ver'_`bh' != 1 & unconstrained_`age_cut'_hlw`ver'_`bh' != 1 & unconstrained_`age_cut'_llw`ver'_`bh' != 1;
			};
		};
			
		*missing if fewer than 2 constrained versus unconstrained observations;
		bys unique: egen constrained_`age_cut'_hlw`ver'_obs_`bh' = total(every_dum_`bh' == 1) if constrained_`age_cut'_hlw`ver'_`bh' != .; /*all have same missing, so just use one*/ 
					
		foreach var in unconstrained constrained {;
			foreach m in h l {;
				replace `var'_`age_cut'_`m'lw`ver'_`bh' = . if constrained_`age_cut'_hlw`ver'_obs_`bh' < 2;	
			};
		};
				
		*NO AGE;
		g constrained_hlw`ver'_`bh' = 0;
		replace constrained_hlw`ver'_`bh' = 1 if const_alpha_dev`ver' == 1 & liqw_dev_50_`bh' == 1;
				
		g constrained_llw`ver'_`bh' = 0;
		replace constrained_llw`ver'_`bh' = 1 if const_alpha_dev`ver' == 1  & liqw_dev_50_`bh' == 0;
				
		g unconstrained_hlw`ver'_`bh' = 0;
		replace unconstrained_hlw`ver'_`bh' = 1 if const_alpha_dev`ver' == 0 & liqw_dev_50_`bh' == 1;
				
		g unconstrained_llw`ver'_`bh' = 0;
		replace unconstrained_llw`ver'_`bh' = 1 if const_alpha_dev`ver' == 0 & liqw_dev_50_`bh' == 0;
			
		foreach var in unconstrained constrained {;
			foreach m in h l {;
				replace `var'_`m'lw`ver'_`bh' = . if constrained_hlw`ver'_`bh' != 1 & constrained_llw`ver'_`bh' != 1 & unconstrained_hlw`ver'_`bh' != 1 & unconstrained_llw`ver'_`bh' != 1;
			};
		};
			
		*missing if fewer than 2 constrained versus unconstrained observations;
		bys unique: egen constrained_hlw`ver'_obs_`bh' = total(every_dum_`bh' == 1) if constrained_hlw`ver'_`bh' != .; /*all have same missing, so just use one*/ 
					
		foreach var in unconstrained constrained {;
			foreach m in h l {;
				replace `var'_`m'lw`ver'_`bh' = . if constrained_hlw`ver'_obs_`bh' < 2;	
			};
		};
	};
};

* need to shorten bartik name ;
ren bartik_empgr_back_ev_w1 bartik_ebev_w1 ;
ren bartik_empgr_for_ev_w1 bartik_efev_w1 ;
ren hp_ev_w1 hp_ev_w1 ;

/* ////////////////////////////////////////////////////////////////////////// */
* Interacting all regression variables with multiple definitions of constrained and liquid asset holdings ;
/* /////////////////////// */
*BANK HEALTH;
foreach bh in re_a cam npl_a {;

	if "`bh'"!="cam" local measure 50 ;
	if "`bh'"=="cam" local measure wa ;
		
	local bh1 `bh';
	*For NPL use RE sample (recall: imposing the same sample for npl and renpl so directly comparable) ;
	if "`bh'"=="npl" | "`bh'"=="npl_a" local bh1 re;
					
	foreach p in `measure' {;
				
		*constrained v unconstrained; 
		foreach c in c u {;
			if "`c'" == "c" local const constrained;
			if "`c'" == "u" local const unconstrained;
	
			* low liquid assets, just under age_cut ;
			foreach w in llw`age_cut' llw ju`age_cut' { ;
				g `bh'_`p'_ev_`c'_`w' = `bh'_`p'_ev * `const'_`w'_`bh1';

				*multi BH versions; 
				foreach t in 5m 10m {; 
					*multi BH: bank vs bank holding company; 
					foreach b in bhc {;	
						g `bh'_`p'_`t'_`b'_ev_`c'_`w' = `bh'_`p'_`t'_`b'_ev * `const'_`w'_`bh1';				
					};
				};
			} ;
			
			foreach age in `age_cut' not {;
					
				if "`age'" == "`age_cut'" local a _`age_cut';
				if "`age'" == "`age_cut'" local `a' "`age_cut'";
				if "`age'" == "not" local a;
				
				*negative deviation vs 5% or greater neg dev; 
				foreach v in negdev 5 {;
					if "`v'" == "negdev" local ver;
					if "`v'" == "5" local ver _5;
					
					g `bh'_`p'_ev_`c'``a''`ver' = `bh'_`p'_ev * `const'`a'`ver'_`bh1';
							
					foreach t in 5m 10m {;
						foreach b in bhc {;
							g `bh'_`p'_`t'_`b'_ev_`c'``a''`ver' = `bh'_`p'_`t'_`b'_ev * `const'`a'`ver'_`bh1';
						} ;
					} ;
					
					* constrained x LA ;
					foreach f in h l {;	
						g `bh'_`p'_ev_`c'`f'`a'`ver' = `bh'_`p'_ev * `const'`a'_`f'lw`ver'_`bh1';
				
						foreach t in 5m 10m {;
							foreach b in bhc {;
								g `bh'_`p'_`t'_`b'_ev_`c'`f'`a'`ver' = `bh'_`p'_`t'_`b'_ev * `const'`a'_`f'lw`ver'_`bh1';
							} ;
						};
					};
				} ;
			} ;
		};
	};
};

*OTHER VARIABLES;
* npl_a uses re_a sample ;
foreach bh in re_a cam {;

	foreach var in ln_famincrat hp_ev_w1 bartik_ebev_w1 totwr_100 totar_100 totdr_100 {;

		foreach c in c u {;
			if "`c'" == "c" local const constrained;
			if "`c'" == "u" local const unconstrained;
			
			foreach w in llw`age_cut' llw ju`age_cut' { ;
				g `var'_`bh'_`c'_`w' = `var' * `const'_`w'_`bh';
			} ;

			foreach age in `age_cut' not {;
				if "`age'" == "`age_cut'" local a _`age_cut';
				if "`age'" == "`age_cut'" local `a' `age_cut';
				if "`age'" == "not" local a;
								
				foreach v in negdev 5 {;
					if "`v'" == "negdev" local ver;
					if "`v'" == "5" local ver _5;
							
					g `var'_`bh'_`c'``a''`ver' = `var' * `const'`a'`ver'_`bh';
						
					foreach f in h l {;	
						g `var'_`bh'_`c'`f'`a'`ver' = `var' * `const'`a'_`f'lw`ver'_`bh';
					};
				};
			};
		};
	};
};

* BH Quintiles ;
foreach bh in re_a npl_a {;
	xtile qn_`bh'_50_5mbhcev =  `bh'_50_5m_bhc_ev, n(5) ;
	forval i = 1/5 { ;
		gen qn`i'_`bh'_50_5mbhcev = 0 if qn_`bh'_50_5mbhcev  != . ;
		replace qn`i'_`bh'_50_5mbhcev = 1 if qn_`bh'_50_5mbhcev  == `i' ;
					
		gen vl_qn`i'_`bh'_50_5mbhcev = avgsqdev*qn`i'_`bh'_50_5mbhcev ;
	} ;
};

* VOLATILITY, BH Qint interactions ;
gen volainc = ln_famincrat*avgsqdev ;

foreach bh in re_a npl_a cam {;
	if "`bh'"!="cam" local measure 50 ;
	if "`bh'"=="cam" local measure wa ;
	
	local bh1 `bh';
	*For NPL use RE sample;
	if "`bh'"=="npl" | "`bh'"=="npl_a" local bh1 re;
		
	foreach p in `measure' {;	
		gen vl_`bh'_`p'_ev = avgsqdev*`bh'_`p'_ev ;						
				
		foreach t in 5m 10m { ;
			foreach b in bhc {;
				gen vl_`bh'_`p'_`t'`b'ev = avgsqdev*`bh'_`p'_`t'_`b'_ev ;
			};
		};
		
		foreach c in c u {;
			if "`c'" == "c" local const constrained;
			if "`c'" == "u" local const unconstrained;
			
			foreach w in llw`age_cut' llw ju`age_cut' { ;
				g vl_`bh'_`p'_ev_`c'_`w' = vl_`bh'_`p'_ev * `const'_`w'_`bh1';
				
				foreach t in 5m 10m {;
					foreach b in bhc {;
						g vl_`bh'_`p'_`t'`b'ev_`c'_`w' = vl_`bh'_`p'_`t'`b'ev * `const'_`w'_`bh1';
					};
				};
			} ;

			foreach age in `age_cut' not {;	
				if "`age'" == "`age_cut'" local a _`age_cut';
				if "`age'" == "`age_cut'" local `a' "`age_cut'";
				if "`age'" == "not" local a;

				foreach v in negdev 5 {;
					if "`v'" == "negdev" local ver;
					if "`v'" == "5" local ver _5;
						
								
			di "`a'`ver'" ;
			
					g vl_`bh'_`p'_ev_`c'``a''`ver' = vl_`bh'_`p'_ev * `const'`a'`ver'_`bh1';
							
					foreach t in 5m 10m {;
						foreach b in bhc {;
							g vl_`bh'_`p'_`t'`b'ev_`c'``a''`ver' = vl_`bh'_`p'_`t'`b'ev* `const'`a'`ver'_`bh1';
						} ;
					} ;
					
					if "`bh'"!="cam" { ;
					forval i = 1/5 { ;
						g qn`i'_`bh'_`p'_5mbhcev`c'``a''`ver' = qn`i'_`bh'_`p'_5mbhcev*`const'`a'`ver'_`bh1';
						g vl_qn`i'_`bh'_`p'_5mbhcev`c'``a''`ver' = vl_qn`i'_`bh'_`p'_5mbhcev*`const'`a'`ver'_`bh1';
					} ;
					} ;							
						foreach f in h l {;	
							g vl_`bh'_`p'_ev_`c'`f'`a'`ver' = vl_`bh'_`p'_ev*`const'`a'_`f'lw`ver'_`bh1';
	
							foreach t in 5m 10m {;
								foreach b in bhc {;
									g vl_`bh'_`p'_`t'`b'ev_`c'`f'`a'`ver' = vl_`bh'_`p'_`t'`b'ev* `const'`a'_`f'lw`ver'_`bh1';
								};
							};
							
							if "`bh'"!="cam" { ;
							forval i = 1/5 { ;
								g qn`i'_`bh'_`p'_5mbhcev`c'`f'`a'`ver' = qn`i'_`bh'_`p'_5mbhcev*`const'`a'_`f'lw`ver'_`bh1';
								g vl_qn`i'_`bh'_`p'_5mbhcev`c'`f'`a'`ver' = vl_qn`i'_`bh'_`p'_5mbhcev*`const'`a'_`f'lw`ver'_`bh1';
							} ;
							} ;
						};
				};
			};
		};
	};			
};

//////////////////

save `data'/`filename'_`dataset'_lw_nolab``age_cut''_15.dta, replace;
} ;

if label {;

/* ////////////////////////////////////////////////////////////////////////// */
** labeling regression variables for output ; 

use `data'/`filename'_`dataset'_lw_nolab``age_cut''_15.dta, clear ;

local b;
foreach bh in re_a npl_a cam {;
	if "`bh'" == "cam" local measure wa ;
	if "`bh'" != "cam" local measure 50 ; 
	
	foreach p in `measure' {;
	if regexm("`bh'", "re") & "`p'" == "50" {;
		local b RENPL50;
	};
	if regexm("`bh'", "npl") & "`p'" == "50" {;
		local b NPL50;
	};
	if regexm("`bh'", "cam") & "`p'" == "wa" {;
		local b CAMELS;
	};
			
		la var `bh'_`p'_ev "`b'";
		la var vl_`bh'_`p'_ev "Inc. Volatility x `b'";
		
		foreach t in 5m 10m {;
			foreach bd in bhc {;
				la var `bh'_`p'_`t'_`bd'_ev "ML `b'";
				la var vl_`bh'_`p'_`t'`bd'ev "Inc. Volatility x ML `b'";
			} ;
		} ;
		
		if "`bh'"=="npl_a" { ;
		if "`p'"=="50" { ;
		lab def qn_`bh'_`p'_5mbhcev 
			1 "ML `b'`type': Quint.1"
			2 "ML `b'`type': Quint.2"
			3 "ML `b'`type': Quint.3"
			4 "ML `b'`type': Quint.4"
			5 "ML `b'`type': Quint.5"
			;
		lab val qn_`bh'_`p'_5mbhcev qn_`bh'_`p'_5mbhcev ;
		
		forval i = 1/5 { ;
			la var qn`i'_`bh'_`p'_5mbhcev "ML `b' Quint.`i'";
			la var vl_qn`i'_`bh'_`p'_5mbhcev "Inc. Vol. x ML `b' Quint.`i'";
		} ;
		} ;
		} ;
		
	* Constrained ;
	foreach con in c u {;	
		
		foreach w in llw`age_cut' llw ju`age_cut' { ;
			la var `bh'_`p'_ev_`con'_`w' "`b'";			
			la var vl_`bh'_`p'_ev_`con'_`w' "Inc. Volatility x `b'";

			foreach t in 5m 10m {;
				foreach bd in bhc {;
					la var `bh'_`p'_`t'_`bd'_ev_`con'_`w' "ML `b'`";
					la var vl_`bh'_`p'_`t'`bd'ev_`con'_`w' "Inc. Volatility x ML `b'";
				};
			};
		
			if "`bh'"!="npl_a" { ;
				la var totwr_100_`bh'_`con'_`w' "Total Wealth/Income";
				la var totar_100_`bh'_`con'_`w' "Total Assets/Income";
				la var totdr_100_`bh'_`con'_`w' "Total Debt/Income";
				la var ln_famincrat_`bh'_`con'_`w' "Log Income";
				la var hp_ev_w1_`bh'_`con'_`w' "House Price Growth";
				la var bartik_ebev_w1_`bh'_`con'_`w' "Bartik Empl. Growth";
			} ;
		} ;
		
		foreach age in `age_cut' not {;
					
			if "`age'" == "`age_cut'" local a _`age_cut';
			if "`age'" == "`age_cut'" local `a' `age_cut';			
			if "`age'" == "not" local a;
			
			if "`c'" == "c" local const constrained;
			if "`c'" == "u" local const unconstrained;

			
			foreach v in negdev 5 {;
				if "`v'" == "negdev" local ver;
				if "`v'" == "5" local ver _5;			
								
				la var `bh'_`p'_ev_`con'``a''`ver' "`b'";
				la var vl_`bh'_`p'_ev_`con'``a''`ver' "Inc. Volatility x `b'";

				foreach t in 5m 10m {;
					foreach bd in bhc {;
						la var `bh'_`p'_`t'_`bd'_ev_`con'``a''`ver' "ML `b'";
						la var vl_`bh'_`p'_`t'`bd'ev_`con'``a''`ver' "Inc. Volatility x ML `b'";

					} ;
				};
				
								
				if "`bh'"!="npl_a" { ;
					la var totwr_100_`bh'_`con'``a''`ver' "Total Wealth/Income";
					la var totar_100_`bh'_`con'``a''`ver' "Total Assets/Income";
					la var totdr_100_`bh'_`con'``a''`ver' "Total Debt/Income";
					la var ln_famincrat_`bh'_`con'``a''`ver' "Log Income";
					la var hp_ev_w1_`bh'_`con'``a''`ver' "House Price Growth";
					la var bartik_ebev_w1_`bh'_`con'``a''`ver' "Bartik Empl. Growth";
				} ;
				
				if "`bh'"=="npl_a" { ;
				forval i = 1/5 { ;
					la var qn`i'_`bh'_`p'_5mbhcev`con'``a''`ver' "ML `b' Quint.`i'";
					la var vl_qn`i'_`bh'_`p'_5mbhcev`con'``a''`ver' "Inc. Vol. x ML `b' Quint.`i'";
				} ;
				} ;
					
					* Constrained x LA ;
					foreach f in h l {;
						la var `bh'_`p'_ev_`con'`f'`a'`ver' "`b'";
						la var vl_`bh'_`p'_ev_`con'`f'`a'`ver' "Inc. Volatility x `b'";
							
						foreach t in 5m 10m {;
							foreach bd in bhc {;
								la var `bh'_`p'_`t'_`bd'_ev_`con'`f'`a'`ver' "ML `b'";
								la var vl_`bh'_`p'_`t'`bd'ev_`con'`f'`a'`ver' "Inc. Volatility x ML `b'";
							} ;
						} ;
						
						if "`bh'"!="npl_a" { ;
							la var totwr_100_`bh'_`con'`f'`a'`ver' "Total Wealth/Income";
							la var totar_100_`bh'_`con'`f'`a'`ver' "Total Assets/Income";
							la var totdr_100_`bh'_`con'`f'`a'`ver' "Total Debt/Income";
							la var ln_famincrat_`bh'_`con'`f'`a'`ver' "Log Income";					
							la var hp_ev_w1_`bh'_`con'`f'`a'`ver' "House Price Growth";
							la var bartik_ebev_w1_`bh'_`con'`f'`a'`ver' "Bartik Empl. Growth";
						} ;
						
						if "`bh'"=="npl_a" { ;
						forval i = 1/5 { ;
							la var qn`i'_`bh'_`p'_5mbhcev`con'`f'`a'`ver' "ML `b' Quint.`i'";
							la var vl_qn`i'_`bh'_`p'_5mbhcev`con'`f'`a'`ver' "Inc. Vol. x ML `b' Quint.`i'";
						} ;
						} ;
					};
			};
		};
			

	};
	};
} ;

la var totwr_100 "Total Wealth/Income";
la var totar_100 "Total Assets/Income";
la var totdr_100 "Total Debt/Income";
la var hp_ev_w1 "House Price Growth";
la var bartik_ebev_w1 "Bartik Empl. Growth";
la var bartik_efev_w1 "Bartik Empl. Growth";
la var ageh "Age";
la var age2 "Age$^2/1000$" ;
la var famsize "Family Size";
la var famsizessq "Family Size Squared";
la var m_se "Single Earner";
la var m_de "Dual Earner";
la var ln_famincrat "Log Income";
la var rfood "Food Consumption";
la var famincrat "Income";
la var pexpn "Broader PSID Consumption Measure";
la var bpp "Imputed Nondurable Consumption";
la var renter "Renter Dummy";

/////
*SAVE;
save `data'/`filename'_`dataset'_lw_new``age_cut''_15.dta, replace;
} ;

restore ;

}; /*if `dataset' -- variables section*/
}; /*foreach dataset in fw hw -- variables section*/

}; /*if variables -- variables section*/

********************************************************************************
